# load library
library(SingleCellExperiment)
library(Seurat)
library(dplyr)
library(Seurat)
library(patchwork)
library(tidyverse)
library(Matrix)
library(scales)
library(cowplot)
library(RCurl)
library(ggplot2)
library(sctransform)
library(dittoSeq)
#load SCT
norm_seurat <- readRDS("processed_data_DEA/normalized_splitseurat.rds")

Data normalization, variable feature selection, integration, and filters out doublets

# Normalize and identify variable features for each dataset independently
norm_seurat <- lapply(X = norm_seurat, FUN = function(x) {
   x <- FindVariableFeatures(x, selection.method = "vst", nfeatures = 2000)
})

# Select the most variable features to use for integration
integ_features <- SelectIntegrationFeatures(object.list = norm_seurat, 
                                            nfeatures = 2000) 
# Transform object for integration
# Prepare the SCT list object for integration
norm_seurat <- PrepSCTIntegration(object.list = norm_seurat, 
                                   anchor.features = integ_features)

# Perform CCA, find the best buddies or anchors and filter incorrect anchors.
# Find best buddies - can take a while to run
integ_anchors <- FindIntegrationAnchors(object.list = norm_seurat, 
                                        normalization.method = "SCT", 
                                        anchor.features = integ_features)
# Integrate across sampleid
seurat_integrated <- IntegrateData(anchorset = integ_anchors, 
                                   normalization.method = "SCT")
# Run PCA

DefaultAssay(seurat_integrated) <- "RNA"

seurat_integrated <- subset(seurat_integrated, doublets == "Singlet")
# Run the standard workflow for visualization and clustering
seurat_integrated <- FindVariableFeatures(seurat_integrated, selection.method = "vst", nfeatures = 2000)
seurat_integrated <- ScaleData(seurat_integrated, verbose = FALSE)
seurat_integrated <- RunPCA(seurat_integrated, npcs = 30, verbose = FALSE,features = VariableFeatures(object = seurat_integrated))
ElbowPlot(seurat_integrated)

seurat_integrated <- FindNeighbors(seurat_integrated,  dims = 1:20)
seurat_integrated <- FindClusters(seurat_integrated, resolution =c(0.1,0.4,0.6)) 
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 15169
## Number of edges: 505059
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9656
## Number of communities: 15
## Elapsed time: 2 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 15169
## Number of edges: 505059
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9215
## Number of communities: 23
## Elapsed time: 1 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 15169
## Number of edges: 505059
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9058
## Number of communities: 28
## Elapsed time: 2 seconds
seurat_integrated <- RunUMAP(seurat_integrated, reduction = "pca", dims = 1:20)
PCAPlot(seurat_integrated,split.by = "condition")

DimPlot(seurat_integrated, label = TRUE) + NoLegend()

DimPlot(seurat_integrated, split.by = "condition")

# Session Info

#date
format(Sys.time(), "%d %B, %Y, %H,%M")
## [1] "05 October, 2023, 09,42"
#Packages used
sessionInfo()
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: CentOS Linux 7 (Core)
## 
## Matrix products: default
## BLAS/LAPACK: /depot/rhel/easybuild/software/OpenBLAS/0.3.12-GCC-10.2.0/lib/libopenblas_haswellp-r0.3.12.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] dittoSeq_1.6.0              sctransform_0.3.5          
##  [3] RCurl_1.98-1.12             cowplot_1.1.1              
##  [5] scales_1.2.1                Matrix_1.6-1               
##  [7] lubridate_1.9.2             forcats_1.0.0              
##  [9] stringr_1.5.0               purrr_1.0.2                
## [11] readr_2.1.4                 tidyr_1.3.0                
## [13] tibble_3.2.1                ggplot2_3.4.3              
## [15] tidyverse_2.0.0             patchwork_1.1.3            
## [17] dplyr_1.1.3                 SeuratObject_4.1.3         
## [19] Seurat_4.3.0.1              SingleCellExperiment_1.16.0
## [21] SummarizedExperiment_1.24.0 Biobase_2.54.0             
## [23] GenomicRanges_1.46.1        GenomeInfoDb_1.30.1        
## [25] IRanges_2.28.0              S4Vectors_0.32.4           
## [27] BiocGenerics_0.40.0         MatrixGenerics_1.6.0       
## [29] matrixStats_1.0.0           knitr_1.44                 
## 
## loaded via a namespace (and not attached):
##   [1] plyr_1.8.8             igraph_1.5.1           lazyeval_0.2.2        
##   [4] sp_2.0-0               splines_4.1.0          listenv_0.9.0         
##   [7] scattermore_1.2        digest_0.6.33          htmltools_0.5.6       
##  [10] fansi_1.0.4            magrittr_2.0.3         tensor_1.5            
##  [13] cluster_2.1.4          ROCR_1.0-11            tzdb_0.4.0            
##  [16] globals_0.16.2         timechange_0.2.0       spatstat.sparse_3.0-2 
##  [19] colorspace_2.1-0       ggrepel_0.9.3          xfun_0.40             
##  [22] jsonlite_1.8.7         progressr_0.14.0       spatstat.data_3.0-1   
##  [25] survival_3.5-5         zoo_1.8-12             glue_1.6.2            
##  [28] polyclip_1.10-4        gtable_0.3.4           zlibbioc_1.40.0       
##  [31] XVector_0.34.0         leiden_0.4.3           DelayedArray_0.20.0   
##  [34] future.apply_1.11.0    abind_1.4-5            pheatmap_1.0.12       
##  [37] spatstat.random_3.1-6  miniUI_0.1.1.1         Rcpp_1.0.11           
##  [40] viridisLite_0.4.2      xtable_1.8-4           reticulate_1.32.0     
##  [43] htmlwidgets_1.6.2      httr_1.4.7             RColorBrewer_1.1-3    
##  [46] ellipsis_0.3.2         ica_1.0-3              farver_2.1.1          
##  [49] pkgconfig_2.0.3        sass_0.4.7             uwot_0.1.16           
##  [52] deldir_1.0-9           utf8_1.2.3             labeling_0.4.3        
##  [55] tidyselect_1.2.0       rlang_1.1.1            reshape2_1.4.4        
##  [58] later_1.3.1            munsell_0.5.0          tools_4.1.0           
##  [61] cachem_1.0.8           cli_3.6.1              generics_0.1.3        
##  [64] ggridges_0.5.4         evaluate_0.21          fastmap_1.1.1         
##  [67] yaml_2.3.7             goftest_1.2-3          fitdistrplus_1.1-11   
##  [70] RANN_2.6.1             pbapply_1.7-2          future_1.33.0         
##  [73] nlme_3.1-162           mime_0.12              compiler_4.1.0        
##  [76] rstudioapi_0.15.0      plotly_4.10.2          png_0.1-8             
##  [79] spatstat.utils_3.0-3   bslib_0.5.1            stringi_1.7.12        
##  [82] lattice_0.20-45        vctrs_0.6.3            pillar_1.9.0          
##  [85] lifecycle_1.0.3        spatstat.geom_3.2-5    lmtest_0.9-40         
##  [88] jquerylib_0.1.4        RcppAnnoy_0.0.21       data.table_1.14.8     
##  [91] bitops_1.0-7           irlba_2.3.5.1          httpuv_1.6.11         
##  [94] R6_2.5.1               promises_1.2.1         KernSmooth_2.23-20    
##  [97] gridExtra_2.3          parallelly_1.36.0      codetools_0.2-19      
## [100] MASS_7.3-58.3          withr_2.5.0            GenomeInfoDbData_1.2.7
## [103] parallel_4.1.0         hms_1.1.3              grid_4.1.0            
## [106] rmarkdown_2.24         Rtsne_0.16             spatstat.explore_3.2-3
## [109] shiny_1.7.5

  1. Center for Integrative Brain Research, Seattle Children’s Research Institute, 1900 9th Ave, Seattle, WA, USA, ↩︎